#!/usr/bin/python

import pypandoc as pypandoc
import regex as re


def transfer(input_file_path: str, input_file_format: str, output_file_path: str, output_file_format: str):
    format_input_file(input_file_path)
    pypandoc.convert_file(source_file=input_file_path, to=output_file_format, format=input_file_format,
                          outputfile=output_file_path,
                          extra_args=["--css", "epub.css", "--epub-cover-image", "123.jpg"])


def format_input_file(input_file_path):
    # with open(input_file_path, encoding="utf8") as f1:
    #     with open(input_file_path + ".format", "a", encoding="utf8") as f2:
    #         for i in f1:
    #             if i.strip(): f2.write(re.sub("\p{Zs}\p{Zs}+", "", i))

    # 先查询多少行 多线程分配任务 最后写入到文件中
    content = ""
    with open(input_file_path, mode="r+", encoding="utf8") as a:
        for line in a:
            if line == "内容简介：":
                content += ("# " + line)
                continue
            if re.match(r'^\s*[第卷][0123456789ⅠI一二三四五六七八九十零序〇百千两]*[章卷].*', line):
                content += ("# " + line)
                continue
            content += line
        with open(input_file_path, mode='r+', encoding="utf8") as b:
            b.write(content)
